PROJECT

In [40]:
# get information of the datatypes
# if there is any class data in the features,convert them into binary form
# look how ages of the data are looking and some information
# get the descriptive statistics of the above data
# construct the histogram, boxplot for every column using for loop
# get the corelation heatmap and find if there is association between certain columns
# choose the specific columns which have certain association and get the pairplot
In [41]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
In [42]:
r=pd.read_csv("C:\\Users\\ruchi\\Downloads\\Minsk2020_ALS_dataset.csv")
In [43]:
r
Out[43]:
ID Sex Age J1_a J3_a J5_a J55_a S1_a S3_a S5_a ... dCCi(7) dCCi(8) dCCi(9) dCCi(10) dCCi(11) dCCi(12) d_1 F2_i F2_{conv} Diagnosis (ALS)
0 8 M 58 0.321817 0.141230 0.199128 0.923634 6.044559 3.196477 3.770575 ... -0.024467 -0.005300 0.051874 -0.037710 -0.026549 -0.021149 4.825476 2526.285657 833.498083 1
1 20 F 57 0.344026 0.177032 0.206458 0.827714 1.967728 0.856639 1.179851 ... 0.002485 -0.004535 -0.000225 -0.006977 -0.012510 0.014773 5.729322 1985.712014 561.802625 1
2 21 F 58 0.264740 0.148228 0.177078 0.532566 1.850893 0.942743 1.071950 ... -0.013927 0.007908 0.007960 -0.009022 -0.012488 -0.015588 8.258488 2364.695972 796.723440 1
3 22 F 70 0.455793 0.174870 0.243660 0.962641 2.883768 1.284926 1.915058 ... -0.019285 -0.021768 0.020495 0.035976 -0.034648 0.008021 5.447137 1860.172768 359.409974 1
4 24 M 66 0.269335 0.143961 0.167465 0.547745 2.327924 1.164109 1.420891 ... -0.005743 0.004726 -0.015247 0.003900 -0.007686 -0.003784 8.562517 2051.627447 817.111847 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
59 123 M 43 0.255799 0.123679 0.182658 0.505591 6.222031 2.876602 3.894294 ... 0.220533 0.089766 -0.120838 -0.004221 -0.013165 0.004642 9.855665 3128.341308 1990.937097 0
60 125 M 63 0.513175 0.296489 0.334845 0.729804 9.686563 4.327943 5.687977 ... 0.028016 -0.038739 0.011588 -0.011281 -0.004294 0.011239 11.094558 1964.218942 601.076046 0
61 127 F 67 0.383901 0.245923 0.251359 0.415136 4.148414 2.069757 2.527213 ... 0.011685 0.007883 -0.014839 0.013859 0.011145 0.001418 12.564742 2526.285657 934.343638 0
62 129 F 68 1.336216 0.815757 0.733197 0.981928 11.224542 5.295879 6.994751 ... 0.015712 0.013437 0.025113 0.008852 -0.010132 -0.008458 10.670669 3201.250289 2284.051658 0
63 131 F 60 0.916706 0.566121 0.512857 1.467165 6.372832 3.251168 3.539229 ... -0.046235 0.041946 -0.065313 -0.016682 0.061026 -0.005883 6.972152 2792.655884 1518.529172 0

64 rows × 135 columns

In [44]:
r=pd.DataFrame(r)
In [45]:
r.head()  ##first five rows
Out[45]:
ID Sex Age J1_a J3_a J5_a J55_a S1_a S3_a S5_a ... dCCi(7) dCCi(8) dCCi(9) dCCi(10) dCCi(11) dCCi(12) d_1 F2_i F2_{conv} Diagnosis (ALS)
0 8 M 58 0.321817 0.141230 0.199128 0.923634 6.044559 3.196477 3.770575 ... -0.024467 -0.005300 0.051874 -0.037710 -0.026549 -0.021149 4.825476 2526.285657 833.498083 1
1 20 F 57 0.344026 0.177032 0.206458 0.827714 1.967728 0.856639 1.179851 ... 0.002485 -0.004535 -0.000225 -0.006977 -0.012510 0.014773 5.729322 1985.712014 561.802625 1
2 21 F 58 0.264740 0.148228 0.177078 0.532566 1.850893 0.942743 1.071950 ... -0.013927 0.007908 0.007960 -0.009022 -0.012488 -0.015588 8.258488 2364.695972 796.723440 1
3 22 F 70 0.455793 0.174870 0.243660 0.962641 2.883768 1.284926 1.915058 ... -0.019285 -0.021768 0.020495 0.035976 -0.034648 0.008021 5.447137 1860.172768 359.409974 1
4 24 M 66 0.269335 0.143961 0.167465 0.547745 2.327924 1.164109 1.420891 ... -0.005743 0.004726 -0.015247 0.003900 -0.007686 -0.003784 8.562517 2051.627447 817.111847 1

5 rows × 135 columns

In [46]:
r.tail()   ##last five rows
Out[46]:
ID Sex Age J1_a J3_a J5_a J55_a S1_a S3_a S5_a ... dCCi(7) dCCi(8) dCCi(9) dCCi(10) dCCi(11) dCCi(12) d_1 F2_i F2_{conv} Diagnosis (ALS)
59 123 M 43 0.255799 0.123679 0.182658 0.505591 6.222031 2.876602 3.894294 ... 0.220533 0.089766 -0.120838 -0.004221 -0.013165 0.004642 9.855665 3128.341308 1990.937097 0
60 125 M 63 0.513175 0.296489 0.334845 0.729804 9.686563 4.327943 5.687977 ... 0.028016 -0.038739 0.011588 -0.011281 -0.004294 0.011239 11.094558 1964.218942 601.076046 0
61 127 F 67 0.383901 0.245923 0.251359 0.415136 4.148414 2.069757 2.527213 ... 0.011685 0.007883 -0.014839 0.013859 0.011145 0.001418 12.564742 2526.285657 934.343638 0
62 129 F 68 1.336216 0.815757 0.733197 0.981928 11.224542 5.295879 6.994751 ... 0.015712 0.013437 0.025113 0.008852 -0.010132 -0.008458 10.670669 3201.250289 2284.051658 0
63 131 F 60 0.916706 0.566121 0.512857 1.467165 6.372832 3.251168 3.539229 ... -0.046235 0.041946 -0.065313 -0.016682 0.061026 -0.005883 6.972152 2792.655884 1518.529172 0

5 rows × 135 columns

In [47]:
r.info()  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64 entries, 0 to 63
Columns: 135 entries, ID to Diagnosis (ALS)
dtypes: float64(131), int64(3), object(1)
memory usage: 67.6+ KB
In [48]:
r.shape
Out[48]:
(64, 135)
In [49]:
r.isnull().sum().sort_values(ascending=False) ##if null values are there are not
Out[49]:
ID                 0
Hi(3)_{mu}         0
Hi(1)_{rel}        0
Hi(8)_{sd}         0
Hi(7)_{sd}         0
                  ..
Ha(5)_{rel}        0
Ha(4)_{rel}        0
Ha(3)_{rel}        0
Ha(2)_{rel}        0
Diagnosis (ALS)    0
Length: 135, dtype: int64
In [50]:
r.columns
Out[50]:
Index(['ID', 'Sex', 'Age', 'J1_a', 'J3_a', 'J5_a', 'J55_a', 'S1_a', 'S3_a',
       'S5_a',
       ...
       'dCCi(7)', 'dCCi(8)', 'dCCi(9)', 'dCCi(10)', 'dCCi(11)', 'dCCi(12)',
       'd_1', 'F2_i', 'F2_{conv}', 'Diagnosis (ALS)'],
      dtype='object', length=135)
In [51]:
for i in r.columns:  ##see the all columns names eith data types
    print(f"'{i}' data type:{r[i].dtypes}")
'ID' data type:int64
'Sex' data type:object
'Age' data type:int64
'J1_a' data type:float64
'J3_a' data type:float64
'J5_a' data type:float64
'J55_a' data type:float64
'S1_a' data type:float64
'S3_a' data type:float64
'S5_a' data type:float64
'S11_a' data type:float64
'S55_a' data type:float64
'DPF_a' data type:float64
'PFR_a' data type:float64
'PPE_a' data type:float64
'PVI_a' data type:float64
'HNR_a' data type:float64
'GNEa_{\mu}' data type:float64
'GNEa_{\sigma}' data type:float64
'Ha(1)_{mu}' data type:float64
'Ha(2)_{mu}' data type:float64
'Ha(3)_{mu}' data type:float64
'Ha(4)_{mu}' data type:float64
'Ha(5)_{mu}' data type:float64
'Ha(6)_{mu}' data type:float64
'Ha(7)_{mu}' data type:float64
'Ha(8)_{mu}' data type:float64
'Ha(1)_{sd}' data type:float64
'Ha(2)_{sd}' data type:float64
'Ha(3)_{sd}' data type:float64
'Ha(4)_{sd}' data type:float64
'Ha(5)_{sd}' data type:float64
'Ha(6)_{sd}' data type:float64
'Ha(7)_{sd}' data type:float64
'Ha(8)_{sd}' data type:float64
'Ha(1)_{rel}' data type:float64
'Ha(2)_{rel}' data type:float64
'Ha(3)_{rel}' data type:float64
'Ha(4)_{rel}' data type:float64
'Ha(5)_{rel}' data type:float64
'Ha(6)_{rel}' data type:float64
'Ha(7)_{rel}' data type:float64
'Ha(8)_{rel}' data type:float64
'CCa(1)' data type:float64
'CCa(2)' data type:float64
'CCa(3)' data type:float64
'CCa(4)' data type:float64
'CCa(5)' data type:float64
'CCa(6)' data type:float64
'CCa(7)' data type:float64
'CCa(8)' data type:float64
'CCa(9)' data type:float64
'CCa(10)' data type:float64
'CCa(11)' data type:float64
'CCa(12)' data type:float64
'dCCa(1)' data type:float64
'dCCa(2)' data type:float64
'dCCa(3)' data type:float64
'dCCa(4)' data type:float64
'dCCa(5)' data type:float64
'dCCa(6)' data type:float64
'dCCa(7)' data type:float64
'dCCa(8)' data type:float64
'dCCa(9)' data type:float64
'dCCa(10)' data type:float64
'dCCa(11)' data type:float64
'dCCa(12)' data type:float64
'J1_i' data type:float64
'J3_i' data type:float64
'J5_i' data type:float64
'J55_i' data type:float64
'S1_i' data type:float64
'S3_i' data type:float64
'S5_i' data type:float64
'S11_i' data type:float64
'S55_i' data type:float64
'DPF_i' data type:float64
'PFR_i' data type:float64
'PPE_i' data type:float64
'PVI_i' data type:float64
'HNR_i' data type:float64
'GNEi_{\mu}' data type:float64
'GNEi_{\sigma}' data type:float64
'Hi(1)_{mu}' data type:float64
'Hi(2)_{mu}' data type:float64
'Hi(3)_{mu}' data type:float64
'Hi(4)_{mu}' data type:float64
'Hi(5)_{mu}' data type:float64
'Hi(6)_{mu}' data type:float64
'Hi(7)_{mu}' data type:float64
'Hi(8)_{mu}' data type:float64
'Hi(1)_{sd}' data type:float64
'Hi(2)_{sd}' data type:float64
'Hi(3)_{sd}' data type:float64
'Hi(4)_{sd}' data type:float64
'Hi(5)_{sd}' data type:float64
'Hi(6)_{sd}' data type:float64
'Hi(7)_{sd}' data type:float64
'Hi(8)_{sd}' data type:float64
'Hi(1)_{rel}' data type:float64
'Hi(2)_{rel}' data type:float64
'Hi(3)_{rel}' data type:float64
'Hi(4)_{rel}' data type:float64
'Hi(5)_{rel}' data type:float64
'Hi(6)_{rel}' data type:float64
'Hi(7)_{rel}' data type:float64
'Hi(8)_{rel}' data type:float64
'CCi(1)' data type:float64
'CCi(2)' data type:float64
'CCi(3)' data type:float64
'CCi(4)' data type:float64
'CCi(5)' data type:float64
'CCi(6)' data type:float64
'CCi(7)' data type:float64
'CCi(8)' data type:float64
'CCi(9)' data type:float64
'CCi(10)' data type:float64
'CCi(11)' data type:float64
'CCi(12)' data type:float64
'dCCi(1)' data type:float64
'dCCi(2)' data type:float64
'dCCi(3)' data type:float64
'dCCi(4)' data type:float64
'dCCi(5)' data type:float64
'dCCi(6)' data type:float64
'dCCi(7)' data type:float64
'dCCi(8)' data type:float64
'dCCi(9)' data type:float64
'dCCi(10)' data type:float64
'dCCi(11)' data type:float64
'dCCi(12)' data type:float64
'd_1' data type:float64
'F2_i' data type:float64
'F2_{conv}' data type:float64
'Diagnosis (ALS)' data type:int64
In [13]:
r["Sex"].value_counts() ##showing the count of gender
Out[13]:
Sex
F    34
M    30
Name: count, dtype: int64
In [14]:
print(r.dtypes) ##data type
ID                   int64
Sex                 object
Age                  int64
J1_a               float64
J3_a               float64
                    ...   
dCCi(12)           float64
d_1                float64
F2_i               float64
F2_{conv}          float64
Diagnosis (ALS)      int64
Length: 135, dtype: object
In [15]:
x=r.iloc[:,:-1]                       ##spliting it into training and testing data
y=r.iloc[:,-1]
print("training data:",x)
print("              "*3)
print("testing data:",y)
training data:      ID Sex  Age      J1_a      J3_a      J5_a     J55_a       S1_a      S3_a  \
0     8   M   58  0.321817  0.141230  0.199128  0.923634   6.044559  3.196477   
1    20   F   57  0.344026  0.177032  0.206458  0.827714   1.967728  0.856639   
2    21   F   58  0.264740  0.148228  0.177078  0.532566   1.850893  0.942743   
3    22   F   70  0.455793  0.174870  0.243660  0.962641   2.883768  1.284926   
4    24   M   66  0.269335  0.143961  0.167465  0.547745   2.327924  1.164109   
..  ...  ..  ...       ...       ...       ...       ...        ...       ...   
59  123   M   43  0.255799  0.123679  0.182658  0.505591   6.222031  2.876602   
60  125   M   63  0.513175  0.296489  0.334845  0.729804   9.686563  4.327943   
61  127   F   67  0.383901  0.245923  0.251359  0.415136   4.148414  2.069757   
62  129   F   68  1.336216  0.815757  0.733197  0.981928  11.224542  5.295879   
63  131   F   60  0.916706  0.566121  0.512857  1.467165   6.372832  3.251168   

        S5_a  ...   dCCi(6)   dCCi(7)   dCCi(8)   dCCi(9)  dCCi(10)  dCCi(11)  \
0   3.770575  ...  0.016809 -0.024467 -0.005300  0.051874 -0.037710 -0.026549   
1   1.179851  ...  0.019235  0.002485 -0.004535 -0.000225 -0.006977 -0.012510   
2   1.071950  ...  0.007199 -0.013927  0.007908  0.007960 -0.009022 -0.012488   
3   1.915058  ...  0.013213 -0.019285 -0.021768  0.020495  0.035976 -0.034648   
4   1.420891  ...  0.002948 -0.005743  0.004726 -0.015247  0.003900 -0.007686   
..       ...  ...       ...       ...       ...       ...       ...       ...   
59  3.894294  ... -0.060395  0.220533  0.089766 -0.120838 -0.004221 -0.013165   
60  5.687977  ... -0.046223  0.028016 -0.038739  0.011588 -0.011281 -0.004294   
61  2.527213  ... -0.005008  0.011685  0.007883 -0.014839  0.013859  0.011145   
62  6.994751  ...  0.025679  0.015712  0.013437  0.025113  0.008852 -0.010132   
63  3.539229  ... -0.072828 -0.046235  0.041946 -0.065313 -0.016682  0.061026   

    dCCi(12)        d_1         F2_i    F2_{conv}  
0  -0.021149   4.825476  2526.285657   833.498083  
1   0.014773   5.729322  1985.712014   561.802625  
2  -0.015588   8.258488  2364.695972   796.723440  
3   0.008021   5.447137  1860.172768   359.409974  
4  -0.003784   8.562517  2051.627447   817.111847  
..       ...        ...          ...          ...  
59  0.004642   9.855665  3128.341308  1990.937097  
60  0.011239  11.094558  1964.218942   601.076046  
61  0.001418  12.564742  2526.285657   934.343638  
62 -0.008458  10.670669  3201.250289  2284.051658  
63 -0.005883   6.972152  2792.655884  1518.529172  

[64 rows x 134 columns]
                                          
testing data: 0     1
1     1
2     1
3     1
4     1
     ..
59    0
60    0
61    0
62    0
63    0
Name: Diagnosis (ALS), Length: 64, dtype: int64
In [16]:
from sklearn.model_selection import train_test_split
In [17]:
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=1,test_size=0.15,stratify=y)
In [18]:
x_train
Out[18]:
ID Sex Age J1_a J3_a J5_a J55_a S1_a S3_a S5_a ... dCCi(6) dCCi(7) dCCi(8) dCCi(9) dCCi(10) dCCi(11) dCCi(12) d_1 F2_i F2_{conv}
14 52 F 62 1.394205 0.839935 0.703224 1.362995 4.661601 2.402909 2.826132 ... -0.000823 0.025207 -0.002524 -0.006103 -0.012850 -0.015023 0.019711 12.862700 3092.653846 1552.852150
10 39 M 67 1.232990 0.561293 0.784691 1.889785 4.139457 1.915796 2.692091 ... 0.031624 -0.018089 0.018493 0.006547 -0.004817 -0.003710 -0.000692 2.276702 1686.160000 669.461749
59 123 M 43 0.255799 0.123679 0.182658 0.505591 6.222031 2.876602 3.894294 ... -0.060395 0.220533 0.089766 -0.120838 -0.004221 -0.013165 0.004642 9.855665 3128.341308 1990.937097
35 24 M 60 0.391222 0.200687 0.232286 0.638498 2.324343 1.181015 1.475889 ... -0.005405 -0.006063 0.028443 -0.006618 0.003011 -0.005547 -0.002806 10.525555 2700.419449 1393.549002
44 77 F 40 0.349185 0.213207 0.227957 0.347673 3.395795 1.826527 2.123809 ... -0.042489 -0.021018 0.049357 0.005870 -0.027345 -0.026138 0.009430 10.594026 3515.050257 1925.062482
26 94 F 55 0.764443 0.423008 0.472764 1.699466 8.254286 3.966819 5.010081 ... 0.018794 0.036827 0.029426 -0.024056 -0.065735 0.012986 0.012941 9.482607 2471.097222 1019.521207
20 72 F 64 0.797801 0.470116 0.522910 1.663179 7.778152 3.640445 4.355894 ... 0.028207 -0.052002 -0.038210 0.028143 0.033156 0.010924 0.020823 5.924309 1880.538263 482.819916
27 96 F 52 0.475047 0.308491 0.266091 0.396462 3.686641 1.999494 2.083195 ... 0.009989 0.000009 0.042347 -0.003170 0.002103 0.011023 -0.031324 11.597077 2792.655884 1457.933269
3 22 F 70 0.455793 0.174870 0.243660 0.962641 2.883768 1.284926 1.915058 ... 0.013213 -0.019285 -0.021768 0.020495 0.035976 -0.034648 0.008021 5.447137 1860.172768 359.409974
23 80 F 63 0.504802 0.253832 0.313823 1.229761 6.571067 2.833840 4.277576 ... 0.013513 -0.003382 0.027770 -0.005112 -0.027102 0.012612 -0.010165 7.339732 3556.978755 2129.076098
4 24 M 66 0.269335 0.143961 0.167465 0.547745 2.327924 1.164109 1.420891 ... 0.002948 -0.005743 0.004726 -0.015247 0.003900 -0.007686 -0.003784 8.562517 2051.627447 817.111847
53 111 F 60 0.121065 0.079473 0.101627 0.285497 1.117740 0.558948 0.615284 ... -0.005255 0.017626 -0.007311 -0.006541 -0.012625 0.013987 0.014222 11.261019 3201.250289 1960.299963
34 16 M 38 0.415366 0.254119 0.243906 0.449362 2.394697 1.292778 1.468358 ... 0.000016 0.008047 -0.005823 -0.006588 -0.000030 0.003137 -0.002440 8.531677 3515.050257 2441.219054
41 61 F 37 0.818954 0.484998 0.511515 0.721296 4.942091 2.724820 3.320166 ... 0.005945 0.021267 -0.008438 0.007339 -0.004838 -0.004133 0.000937 10.151495 2444.009071 1157.993834
57 119 F 65 0.356684 0.233194 0.242305 0.410246 3.251335 1.867689 1.908461 ... 0.005494 -0.003761 0.006982 0.012774 0.008384 0.002823 -0.014323 10.889792 2471.097222 1549.074209
62 129 F 68 1.336216 0.815757 0.733197 0.981928 11.224542 5.295879 6.994751 ... 0.025679 0.015712 0.013437 0.025113 0.008852 -0.010132 -0.008458 10.670669 3201.250289 2284.051658
51 107 M 80 0.532330 0.296012 0.322217 0.650045 8.042171 4.754839 5.060159 ... -0.014078 0.010696 -0.003448 -0.015049 0.010195 -0.002152 0.001881 9.472404 2238.670803 1241.852695
37 28 M 35 0.758571 0.464961 0.413086 0.605323 9.271523 5.707370 5.150374 ... -0.003686 0.018992 0.034957 0.016890 -0.009376 -0.008998 -0.007465 12.892692 2888.617021 1653.804580
13 48 F 63 0.805433 0.335434 0.487871 1.813700 4.485662 2.192828 2.864096 ... 0.005770 -0.028306 -0.035823 -0.004287 -0.014985 -0.004156 -0.005061 7.029500 2730.764545 1288.920905
7 28 M 58 1.210548 0.726523 0.661670 1.205596 8.492104 4.109625 5.377768 ... 0.007534 0.017088 0.015907 -0.006379 0.057303 -0.001919 -0.008007 8.422353 1964.218942 759.068477
33 6 M 41 1.063272 0.683918 0.504427 0.613402 3.176717 1.870164 1.590798 ... 0.017293 -0.008880 -0.001511 0.003965 0.015240 -0.006500 -0.008723 10.482453 2263.284796 801.333727
60 125 M 63 0.513175 0.296489 0.334845 0.729804 9.686563 4.327943 5.687977 ... -0.046223 0.028016 -0.038739 0.011588 -0.011281 -0.004294 0.011239 11.094558 1964.218942 601.076046
50 99 F 57 0.098881 0.065791 0.092655 0.363699 0.883453 0.449099 0.497111 ... 0.015152 -0.027019 0.037807 -0.019812 0.000436 -0.002840 -0.013161 10.108459 2143.018556 805.694015
15 55 M 61 1.177795 0.730069 0.569287 1.542224 5.883227 3.384535 2.875918 ... 0.028733 -0.004708 -0.004383 0.006398 -0.014412 -0.005085 0.005530 7.148809 2051.627447 784.563460
9 32 M 61 0.387730 0.213745 0.249993 0.591160 3.351240 1.874979 2.075762 ... 0.004241 0.006956 -0.002013 -0.000082 0.008275 -0.013829 -0.006955 7.572111 1780.825796 838.978523
32 4 F 53 0.598550 0.350577 0.378646 0.654399 4.224992 2.312947 2.678596 ... 0.001768 -0.003924 0.001995 0.004019 -0.007207 0.004614 0.003154 7.615608 2921.471038 1230.982918
45 81 F 60 0.286517 0.176603 0.195712 0.692300 1.847736 0.869278 1.099434 ... 0.023767 -0.020693 0.019078 -0.034556 -0.006731 -0.025442 -0.005668 12.874560 2670.464441 1309.871125
52 109 F 59 0.326851 0.215126 0.182667 0.304952 2.801295 1.669496 1.509331 ... -0.000688 -0.018500 0.030142 0.002111 0.006603 -0.013728 -0.021879 8.064921 2700.419449 1228.586973
18 64 M 57 0.426554 0.202661 0.255198 0.885479 4.387137 1.892587 2.628737 ... 0.003446 0.010420 0.005288 -0.006713 0.013147 -0.017816 0.017087 11.136041 2238.670803 930.223353
5 25 M 51 0.339593 0.182070 0.204186 0.505987 1.969217 0.834783 1.208688 ... 0.009548 -0.005489 0.010164 -0.008341 -0.006132 0.005441 0.003568 9.810520 2143.018556 1004.727725
29 100 M 69 0.511742 0.313666 0.320677 0.431433 7.417198 4.258109 4.961685 ... -0.013859 0.000308 -0.038984 0.048771 -0.014493 0.008647 -0.004524 2.512995 1233.583584 48.246203
24 84 F 55 0.419330 0.255329 0.338319 0.486463 4.366452 1.932596 2.855240 ... -0.005714 0.012337 -0.005475 -0.001179 -0.002687 0.005369 0.034289 11.766102 2888.617021 1839.961952
19 68 M 40 0.496922 0.213975 0.293201 0.957065 3.293700 1.672811 1.894214 ... 0.023989 -0.030137 0.024769 0.000567 0.005071 -0.023693 0.009770 7.872279 2143.018556 985.160918
30 102 F 53 0.561542 0.331788 0.345130 1.020709 6.074875 2.798090 3.526055 ... 0.022664 0.008537 -0.000306 -0.012570 -0.048113 0.009073 -0.005201 12.036001 2526.285657 1200.269866
54 113 F 62 0.823780 0.503064 0.497916 0.704065 6.861939 3.993216 4.174705 ... 0.014494 0.003410 0.000716 -0.010791 0.004878 -0.005527 -0.001659 7.388631 2096.808356 737.085571
6 27 M 57 0.691093 0.406901 0.406287 0.765986 6.168256 3.702088 3.286232 ... 0.002661 -0.012605 0.013385 0.004513 0.001568 -0.008244 0.005801 5.945219 2313.388825 1219.744513
48 89 F 45 0.155762 0.091831 0.117099 0.575170 1.575403 0.738640 0.834830 ... 0.002348 -0.002313 0.008766 0.006330 0.016645 -0.003417 0.002115 10.713432 3599.554394 2226.127951
2 21 F 58 0.264740 0.148228 0.177078 0.532566 1.850893 0.942743 1.071950 ... 0.007199 -0.013927 0.007908 0.007960 -0.009022 -0.012488 -0.015588 8.258488 2364.695972 796.723440
12 46 F 50 0.199868 0.132266 0.157546 0.454599 1.467287 0.706004 0.819799 ... 0.001819 0.011291 -0.001916 0.001513 -0.002502 0.002014 0.010446 10.950821 3164.536485 1553.425003
55 115 F 50 0.462076 0.269359 0.296952 0.909990 5.020638 2.305690 2.837745 ... 0.001699 -0.001674 0.012043 -0.003015 0.022928 -0.005444 -0.006394 14.651111 3515.050257 2210.936432
22 78 F 64 0.437128 0.265519 0.349086 0.493971 3.205305 1.515726 2.119909 ... -0.060397 0.056134 -0.069202 -0.040180 -0.058987 0.044552 0.077897 2.986929 1800.351911 784.205580
49 97 F 39 0.463874 0.290374 0.278926 0.550290 2.449028 1.372602 1.412486 ... -0.000234 0.011520 0.009453 0.002072 -0.002811 0.004577 -0.009058 10.616077 3164.536485 2037.766311
28 98 M 68 1.076446 0.624824 0.561100 1.965493 7.001105 3.785627 4.033533 ... -0.011602 0.018274 0.004485 0.005101 -0.007367 0.007041 -0.001560 11.097431 3515.050257 2157.871393
46 85 F 55 0.454844 0.289458 0.293609 0.582757 2.124786 1.170689 1.182154 ... 0.006111 0.003214 0.007520 -0.024862 0.016600 -0.015827 0.002088 12.189059 3092.653846 1551.286187
16 58 M 58 5.391649 3.217293 3.321567 5.991336 29.441589 16.791944 18.368778 ... -0.002691 0.008994 0.025390 0.040231 0.003503 -0.006546 -0.024835 8.008742 1921.927690 583.380671
63 131 F 60 0.916706 0.566121 0.512857 1.467165 6.372832 3.251168 3.539229 ... -0.072828 -0.046235 0.041946 -0.065313 -0.016682 0.061026 -0.005883 6.972152 2792.655884 1518.529172
21 76 M 68 0.379367 0.223560 0.308426 1.821009 5.539475 2.551095 3.000675 ... 0.008846 -0.006281 -0.004169 -0.008164 0.012667 -0.015442 0.001157 5.218871 1649.621788 177.843734
58 121 F 67 0.237654 0.154117 0.154312 0.544602 3.583597 1.690501 1.840416 ... -0.027062 -0.002917 0.020006 -0.064556 0.014446 0.015248 -0.013882 9.311776 2670.464441 1087.940178
61 127 F 67 0.383901 0.245923 0.251359 0.415136 4.148414 2.069757 2.527213 ... -0.005008 0.011685 0.007883 -0.014839 0.013859 0.011145 0.001418 12.564742 2526.285657 934.343638
38 42 M 60 0.182721 0.089174 0.113216 0.651748 1.839764 0.673977 1.084464 ... 0.021454 0.007635 0.019134 0.013320 0.002002 -0.022146 0.005825 15.420777 2313.388825 1381.628235
11 42 M 67 1.608454 0.989100 0.894706 1.304613 6.031953 3.279233 3.729620 ... 0.019701 0.004293 0.026891 0.080337 -0.007352 0.055993 0.075071 10.674106 2007.441819 481.009629
47 86 F 63 0.753234 0.467912 0.442792 0.540327 4.801768 2.677700 3.241265 ... -0.018937 0.011239 0.011488 0.015334 0.008337 0.005474 0.011687 10.515820 3057.463491 1494.054076
31 2 F 64 0.219429 0.144385 0.171661 0.555528 2.054277 1.108746 1.260472 ... -0.011558 -0.004168 0.018603 0.019350 0.016342 0.014127 0.022756 10.121803 2988.533127 1332.559788
56 117 M 49 0.319535 0.143602 0.193376 0.846877 4.171980 1.868911 2.581316 ... -0.033465 0.019372 -0.041478 0.004583 -0.013913 -0.007205 0.013409 10.286022 1985.712014 804.666593

54 rows × 134 columns

In [19]:
x_test
Out[19]:
ID Sex Age J1_a J3_a J5_a J55_a S1_a S3_a S5_a ... dCCi(6) dCCi(7) dCCi(8) dCCi(9) dCCi(10) dCCi(11) dCCi(12) d_1 F2_i F2_{conv}
8 31 M 67 0.537010 0.243234 0.318075 1.951256 6.568645 2.891654 4.620345 ... 0.014234 0.013594 -0.003722 -0.003859 -0.011985 -0.027240 0.002325 8.760510 1840.022120 669.022078
39 49 M 38 0.176448 0.103346 0.123381 0.649644 2.649815 1.410593 1.504265 ... 0.016461 0.014798 -0.011568 -0.009276 -0.003996 -0.003248 -0.005049 10.074644 2119.782609 580.338238
25 92 F 39 2.061820 1.219720 1.280772 1.306779 15.095252 7.876805 9.839396 ... 0.009492 -0.033642 -0.032431 -0.056497 -0.003210 0.019443 0.004703 7.986808 3164.536485 1734.620853
17 62 M 57 0.351191 0.165077 0.227554 0.849025 3.695872 1.776465 2.379529 ... 0.024865 -0.008019 0.005046 0.023446 -0.000726 -0.016418 0.010174 6.031056 2074.091402 927.063276
40 53 M 60 0.487857 0.208116 0.282759 1.183502 4.734293 2.542676 3.053723 ... -0.003976 0.001242 0.005562 -0.000695 0.003333 0.004020 -0.005159 9.943562 2471.097222 1212.922300
1 20 F 57 0.344026 0.177032 0.206458 0.827714 1.967728 0.856639 1.179851 ... 0.019235 0.002485 -0.004535 -0.000225 -0.006977 -0.012510 0.014773 5.729322 1985.712014 561.802625
36 26 M 34 0.163026 0.094282 0.112654 0.378197 2.617874 1.361172 1.550565 ... -0.010711 0.006573 0.003950 -0.003954 0.001273 0.005890 0.000385 8.471564 2700.419449 1293.468915
42 63 F 50 0.323175 0.126138 0.176197 0.907850 3.069980 1.372204 1.869070 ... 0.004152 -0.001733 0.030566 -0.037259 -0.020167 -0.042955 -0.083977 9.167460 444.730268 1169.075556
43 65 M 52 1.529994 0.894926 0.809959 1.715071 7.627483 4.321696 4.415383 ... 0.015596 -0.012218 0.015828 -0.014697 0.010636 0.004654 -0.003235 7.758796 2761.507400 1641.852909
0 8 M 58 0.321817 0.141230 0.199128 0.923634 6.044559 3.196477 3.770575 ... 0.016809 -0.024467 -0.005300 0.051874 -0.037710 -0.026549 -0.021149 4.825476 2526.285657 833.498083

10 rows × 134 columns

In [20]:
y_train
Out[20]:
14    1
10    1
59    0
35    0
44    0
26    1
20    1
27    1
3     1
23    1
4     1
53    0
34    0
41    0
57    0
62    0
51    0
37    0
13    1
7     1
33    0
60    0
50    0
15    1
9     1
32    0
45    0
52    0
18    1
5     1
29    1
24    1
19    1
30    1
54    0
6     1
48    0
2     1
12    1
55    0
22    1
49    0
28    1
46    0
16    1
63    0
21    1
58    0
61    0
38    0
11    1
47    0
31    0
56    0
Name: Diagnosis (ALS), dtype: int64
In [21]:
y_test
Out[21]:
8     1
39    0
25    1
17    1
40    0
1     1
36    0
42    0
43    0
0     1
Name: Diagnosis (ALS), dtype: int64
In [22]:
print(r.info())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64 entries, 0 to 63
Columns: 135 entries, ID to Diagnosis (ALS)
dtypes: float64(131), int64(3), object(1)
memory usage: 67.6+ KB
None
In [23]:
plt.figure(figsize=(20,10))
age=sns.countplot(x="Age",data=r)
for bars in age.containers:
    age.bar_label(bars)
No description has been provided for this image
In [24]:
gender=sns.countplot(x="Sex",data=r)
for bars in gender.containers:
    gender.bar_label(bars)
No description has been provided for this image
In [25]:
r["Sex"]=r["Sex"].replace({"M": 0,"F": 1})   ##categorical to binary
In [26]:
r["Sex"]
Out[26]:
0     0
1     1
2     1
3     1
4     0
     ..
59    0
60    0
61    1
62    1
63    1
Name: Sex, Length: 64, dtype: int64
In [27]:
r.head()
Out[27]:
ID Sex Age J1_a J3_a J5_a J55_a S1_a S3_a S5_a ... dCCi(7) dCCi(8) dCCi(9) dCCi(10) dCCi(11) dCCi(12) d_1 F2_i F2_{conv} Diagnosis (ALS)
0 8 0 58 0.321817 0.141230 0.199128 0.923634 6.044559 3.196477 3.770575 ... -0.024467 -0.005300 0.051874 -0.037710 -0.026549 -0.021149 4.825476 2526.285657 833.498083 1
1 20 1 57 0.344026 0.177032 0.206458 0.827714 1.967728 0.856639 1.179851 ... 0.002485 -0.004535 -0.000225 -0.006977 -0.012510 0.014773 5.729322 1985.712014 561.802625 1
2 21 1 58 0.264740 0.148228 0.177078 0.532566 1.850893 0.942743 1.071950 ... -0.013927 0.007908 0.007960 -0.009022 -0.012488 -0.015588 8.258488 2364.695972 796.723440 1
3 22 1 70 0.455793 0.174870 0.243660 0.962641 2.883768 1.284926 1.915058 ... -0.019285 -0.021768 0.020495 0.035976 -0.034648 0.008021 5.447137 1860.172768 359.409974 1
4 24 0 66 0.269335 0.143961 0.167465 0.547745 2.327924 1.164109 1.420891 ... -0.005743 0.004726 -0.015247 0.003900 -0.007686 -0.003784 8.562517 2051.627447 817.111847 1

5 rows × 135 columns

In [28]:
stats=r.describe().T ##statistics
In [29]:
print(stats)
                 count         mean         std         min          25%  \
ID                64.0    67.875000   37.358198    2.000000    31.750000   
Sex               64.0     0.531250    0.502967    0.000000     0.000000   
Age               64.0    56.390625   10.203668   34.000000    50.750000   
J1_a              64.0     0.658951    0.724002    0.098881     0.325932   
J3_a              64.0     0.379242    0.435636    0.065791     0.172422   
...                ...          ...         ...         ...          ...   
dCCi(12)          64.0     0.001269    0.020800   -0.083977    -0.006534   
d_1               64.0     9.164473    2.681449    2.276702     7.604734   
F2_i              64.0  2495.116475  617.755856  444.730268  2051.627447   
F2_{conv}         64.0  1209.976405  553.694046   48.246203   800.181156   
Diagnosis (ALS)   64.0     0.484375    0.503706    0.000000     0.000000   

                         50%          75%          max  
ID                 66.500000    98.250000   131.000000  
Sex                 1.000000     1.000000     1.000000  
Age                58.000000    63.250000    80.000000  
J1_a                0.458935     0.772783     5.391649  
J3_a                0.253976     0.465699     3.217293  
...                      ...          ...          ...  
dCCi(12)            0.000661     0.009515     0.077897  
d_1                 9.646564    10.757522    15.420777  
F2_i             2471.097222  2938.236560  3599.554394  
F2_{conv}        1206.596083  1551.677678  2441.219054  
Diagnosis (ALS)     0.000000     1.000000     1.000000  

[135 rows x 8 columns]
In [30]:
r.mode()
Out[30]:
ID Sex Age J1_a J3_a J5_a J55_a S1_a S3_a S5_a ... dCCi(7) dCCi(8) dCCi(9) dCCi(10) dCCi(11) dCCi(12) d_1 F2_i F2_{conv} Diagnosis (ALS)
0 24.0 1.0 60.0 0.098881 0.065791 0.092655 0.285497 0.883453 0.449099 0.497111 ... -0.052002 -0.069202 -0.120838 -0.065735 -0.042955 -0.083977 2.276702 3515.050257 48.246203 0.0
1 28.0 NaN NaN 0.121065 0.079473 0.101627 0.304952 1.117740 0.558948 0.615284 ... -0.046235 -0.041478 -0.065313 -0.058987 -0.034648 -0.031324 2.512995 NaN 177.843734 NaN
2 42.0 NaN NaN 0.155762 0.089174 0.112654 0.347673 1.467287 0.673977 0.819799 ... -0.033642 -0.038984 -0.064556 -0.048113 -0.027240 -0.024835 2.986929 NaN 359.409974 NaN
3 NaN NaN NaN 0.163026 0.091831 0.113216 0.363699 1.575403 0.706004 0.834830 ... -0.030137 -0.038739 -0.056497 -0.037710 -0.026549 -0.021879 4.825476 NaN 481.009629 NaN
4 NaN NaN NaN 0.176448 0.094282 0.117099 0.378197 1.839764 0.738640 1.071950 ... -0.028306 -0.038210 -0.040180 -0.027345 -0.026138 -0.021149 5.218871 NaN 482.819916 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
59 NaN NaN NaN 1.394205 0.839935 0.784691 1.821009 9.271523 4.754839 5.377768 ... 0.025207 0.037807 0.028143 0.016645 0.015248 0.020823 12.862700 NaN 2157.871393 NaN
60 NaN NaN NaN 1.529994 0.894926 0.809959 1.889785 9.686563 5.295879 5.687977 ... 0.028016 0.041946 0.040231 0.022928 0.019443 0.022756 12.874560 NaN 2210.936432 NaN
61 NaN NaN NaN 1.608454 0.989100 0.894706 1.951256 11.224542 5.707370 6.994751 ... 0.036827 0.042347 0.048771 0.033156 0.044552 0.034289 12.892692 NaN 2226.127951 NaN
62 NaN NaN NaN 2.061820 1.219720 1.280772 1.965493 15.095252 7.876805 9.839396 ... 0.056134 0.049357 0.051874 0.035976 0.055993 0.075071 14.651111 NaN 2284.051658 NaN
63 NaN NaN NaN 5.391649 3.217293 3.321567 5.991336 29.441589 16.791944 18.368778 ... 0.220533 0.089766 0.080337 0.057303 0.061026 0.077897 15.420777 NaN 2441.219054 NaN

64 rows × 135 columns

In [31]:
columns=r.columns
In [32]:
len(r.columns)
Out[32]:
135
In [33]:
135/5
Out[33]:
27.0
In [34]:
for column in r:
    plt.figure(figsize=(10,4))
    plt.subplot(1,2,1)
    sns.histplot(r[column],bins=8,color="red",kde=True)
    plt.title(f"Histogram of "+column)
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [35]:
for col in columns:
    plt.figure(figsize=(10,4))
    plt.subplot(1,2,1)
    sns.boxplot(x=r[col])
    plt.title("boxplot of" +col)
    plt.xlabel(col)
    plt.ylabel("Frequency")
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [36]:
corr_matrix=r.corr()
In [37]:
correlation_matrix=r.corr()
In [38]:
plt.figure(figsize=(200,200))
sns.heatmap(data=correlation_matrix,vmax=None,annot=True,fmt=".2g")
plt.title("correlation Heatmap")
plt.show()
plt.tight_layout()
No description has been provided for this image
<Figure size 640x480 with 0 Axes>
In [52]:
pd.crosstab(r.Sex,r['Diagnosis(ALS)'])
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File ~\anaconda3\Lib\site-packages\pandas\core\indexes\base.py:3791, in Index.get_loc(self, key)
   3790 try:
-> 3791     return self._engine.get_loc(casted_key)
   3792 except KeyError as err:

File index.pyx:152, in pandas._libs.index.IndexEngine.get_loc()

File index.pyx:181, in pandas._libs.index.IndexEngine.get_loc()

File pandas\_libs\hashtable_class_helper.pxi:7080, in pandas._libs.hashtable.PyObjectHashTable.get_item()

File pandas\_libs\hashtable_class_helper.pxi:7088, in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'Diagnosis(ALS)'

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
Cell In[52], line 1
----> 1 pd.crosstab(r.Sex,r['Diagnosis(ALS)'])

File ~\anaconda3\Lib\site-packages\pandas\core\frame.py:3893, in DataFrame.__getitem__(self, key)
   3891 if self.columns.nlevels > 1:
   3892     return self._getitem_multilevel(key)
-> 3893 indexer = self.columns.get_loc(key)
   3894 if is_integer(indexer):
   3895     indexer = [indexer]

File ~\anaconda3\Lib\site-packages\pandas\core\indexes\base.py:3798, in Index.get_loc(self, key)
   3793     if isinstance(casted_key, slice) or (
   3794         isinstance(casted_key, abc.Iterable)
   3795         and any(isinstance(x, slice) for x in casted_key)
   3796     ):
   3797         raise InvalidIndexError(key)
-> 3798     raise KeyError(key) from err
   3799 except TypeError:
   3800     # If we have a listlike key, _check_indexing_error will raise
   3801     #  InvalidIndexError. Otherwise we fall through and re-raise
   3802     #  the TypeError.
   3803     self._check_indexing_error(key)

KeyError: 'Diagnosis(ALS)'
In [53]:
pd.crosstab(r.Age,r.Sex)
Out[53]:
Sex F M
Age
34 0 1
35 0 1
37 1 0
38 0 2
39 2 0
40 1 1
41 0 1
43 0 1
45 1 0
49 0 1
50 3 0
51 0 1
52 1 1
53 2 0
55 3 0
57 2 3
58 1 3
59 1 0
60 3 3
61 0 2
62 2 0
63 3 1
64 3 0
65 1 0
66 0 1
67 2 3
68 1 2
69 0 1
70 1 0
80 0 1
In [54]:
sns.pairplot(data=r.iloc[:,2:15],palette=dict)
Out[54]:
<seaborn.axisgrid.PairGrid at 0x24b1b7e2310>
No description has been provided for this image
In [55]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier,BaggingClassifier,GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold,StratifiedKFold,LeaveOneOut,ShuffleSplit
from sklearn.metrics import accuracy_score,classification_report
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
In [56]:
x=r.iloc[:,:-1]
y=r.iloc[:,-1]
In [57]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=18)
In [58]:
x_train
Out[58]:
ID Sex Age J1_a J3_a J5_a J55_a S1_a S3_a S5_a ... dCCi(6) dCCi(7) dCCi(8) dCCi(9) dCCi(10) dCCi(11) dCCi(12) d_1 F2_i F2_{conv}
30 102 F 53 0.561542 0.331788 0.345130 1.020709 6.074875 2.798090 3.526055 ... 0.022664 0.008537 -0.000306 -0.012570 -0.048113 0.009073 -0.005201 12.036001 2526.285657 1200.269866
39 49 M 38 0.176448 0.103346 0.123381 0.649644 2.649815 1.410593 1.504265 ... 0.016461 0.014798 -0.011568 -0.009276 -0.003996 -0.003248 -0.005049 10.074644 2119.782609 580.338238
54 113 F 62 0.823780 0.503064 0.497916 0.704065 6.861939 3.993216 4.174705 ... 0.014494 0.003410 0.000716 -0.010791 0.004878 -0.005527 -0.001659 7.388631 2096.808356 737.085571
52 109 F 59 0.326851 0.215126 0.182667 0.304952 2.801295 1.669496 1.509331 ... -0.000688 -0.018500 0.030142 0.002111 0.006603 -0.013728 -0.021879 8.064921 2700.419449 1228.586973
45 81 F 60 0.286517 0.176603 0.195712 0.692300 1.847736 0.869278 1.099434 ... 0.023767 -0.020693 0.019078 -0.034556 -0.006731 -0.025442 -0.005668 12.874560 2670.464441 1309.871125
61 127 F 67 0.383901 0.245923 0.251359 0.415136 4.148414 2.069757 2.527213 ... -0.005008 0.011685 0.007883 -0.014839 0.013859 0.011145 0.001418 12.564742 2526.285657 934.343638
28 98 M 68 1.076446 0.624824 0.561100 1.965493 7.001105 3.785627 4.033533 ... -0.011602 0.018274 0.004485 0.005101 -0.007367 0.007041 -0.001560 11.097431 3515.050257 2157.871393
58 121 F 67 0.237654 0.154117 0.154312 0.544602 3.583597 1.690501 1.840416 ... -0.027062 -0.002917 0.020006 -0.064556 0.014446 0.015248 -0.013882 9.311776 2670.464441 1087.940178
23 80 F 63 0.504802 0.253832 0.313823 1.229761 6.571067 2.833840 4.277576 ... 0.013513 -0.003382 0.027770 -0.005112 -0.027102 0.012612 -0.010165 7.339732 3556.978755 2129.076098
33 6 M 41 1.063272 0.683918 0.504427 0.613402 3.176717 1.870164 1.590798 ... 0.017293 -0.008880 -0.001511 0.003965 0.015240 -0.006500 -0.008723 10.482453 2263.284796 801.333727
41 61 F 37 0.818954 0.484998 0.511515 0.721296 4.942091 2.724820 3.320166 ... 0.005945 0.021267 -0.008438 0.007339 -0.004838 -0.004133 0.000937 10.151495 2444.009071 1157.993834
15 55 M 61 1.177795 0.730069 0.569287 1.542224 5.883227 3.384535 2.875918 ... 0.028733 -0.004708 -0.004383 0.006398 -0.014412 -0.005085 0.005530 7.148809 2051.627447 784.563460
31 2 F 64 0.219429 0.144385 0.171661 0.555528 2.054277 1.108746 1.260472 ... -0.011558 -0.004168 0.018603 0.019350 0.016342 0.014127 0.022756 10.121803 2988.533127 1332.559788
27 96 F 52 0.475047 0.308491 0.266091 0.396462 3.686641 1.999494 2.083195 ... 0.009989 0.000009 0.042347 -0.003170 0.002103 0.011023 -0.031324 11.597077 2792.655884 1457.933269
26 94 F 55 0.764443 0.423008 0.472764 1.699466 8.254286 3.966819 5.010081 ... 0.018794 0.036827 0.029426 -0.024056 -0.065735 0.012986 0.012941 9.482607 2471.097222 1019.521207
59 123 M 43 0.255799 0.123679 0.182658 0.505591 6.222031 2.876602 3.894294 ... -0.060395 0.220533 0.089766 -0.120838 -0.004221 -0.013165 0.004642 9.855665 3128.341308 1990.937097
35 24 M 60 0.391222 0.200687 0.232286 0.638498 2.324343 1.181015 1.475889 ... -0.005405 -0.006063 0.028443 -0.006618 0.003011 -0.005547 -0.002806 10.525555 2700.419449 1393.549002
40 53 M 60 0.487857 0.208116 0.282759 1.183502 4.734293 2.542676 3.053723 ... -0.003976 0.001242 0.005562 -0.000695 0.003333 0.004020 -0.005159 9.943562 2471.097222 1212.922300
29 100 M 69 0.511742 0.313666 0.320677 0.431433 7.417198 4.258109 4.961685 ... -0.013859 0.000308 -0.038984 0.048771 -0.014493 0.008647 -0.004524 2.512995 1233.583584 48.246203
0 8 M 58 0.321817 0.141230 0.199128 0.923634 6.044559 3.196477 3.770575 ... 0.016809 -0.024467 -0.005300 0.051874 -0.037710 -0.026549 -0.021149 4.825476 2526.285657 833.498083
18 64 M 57 0.426554 0.202661 0.255198 0.885479 4.387137 1.892587 2.628737 ... 0.003446 0.010420 0.005288 -0.006713 0.013147 -0.017816 0.017087 11.136041 2238.670803 930.223353
55 115 F 50 0.462076 0.269359 0.296952 0.909990 5.020638 2.305690 2.837745 ... 0.001699 -0.001674 0.012043 -0.003015 0.022928 -0.005444 -0.006394 14.651111 3515.050257 2210.936432
13 48 F 63 0.805433 0.335434 0.487871 1.813700 4.485662 2.192828 2.864096 ... 0.005770 -0.028306 -0.035823 -0.004287 -0.014985 -0.004156 -0.005061 7.029500 2730.764545 1288.920905
57 119 F 65 0.356684 0.233194 0.242305 0.410246 3.251335 1.867689 1.908461 ... 0.005494 -0.003761 0.006982 0.012774 0.008384 0.002823 -0.014323 10.889792 2471.097222 1549.074209
6 27 M 57 0.691093 0.406901 0.406287 0.765986 6.168256 3.702088 3.286232 ... 0.002661 -0.012605 0.013385 0.004513 0.001568 -0.008244 0.005801 5.945219 2313.388825 1219.744513
14 52 F 62 1.394205 0.839935 0.703224 1.362995 4.661601 2.402909 2.826132 ... -0.000823 0.025207 -0.002524 -0.006103 -0.012850 -0.015023 0.019711 12.862700 3092.653846 1552.852150
51 107 M 80 0.532330 0.296012 0.322217 0.650045 8.042171 4.754839 5.060159 ... -0.014078 0.010696 -0.003448 -0.015049 0.010195 -0.002152 0.001881 9.472404 2238.670803 1241.852695
9 32 M 61 0.387730 0.213745 0.249993 0.591160 3.351240 1.874979 2.075762 ... 0.004241 0.006956 -0.002013 -0.000082 0.008275 -0.013829 -0.006955 7.572111 1780.825796 838.978523
3 22 F 70 0.455793 0.174870 0.243660 0.962641 2.883768 1.284926 1.915058 ... 0.013213 -0.019285 -0.021768 0.020495 0.035976 -0.034648 0.008021 5.447137 1860.172768 359.409974
1 20 F 57 0.344026 0.177032 0.206458 0.827714 1.967728 0.856639 1.179851 ... 0.019235 0.002485 -0.004535 -0.000225 -0.006977 -0.012510 0.014773 5.729322 1985.712014 561.802625
43 65 M 52 1.529994 0.894926 0.809959 1.715071 7.627483 4.321696 4.415383 ... 0.015596 -0.012218 0.015828 -0.014697 0.010636 0.004654 -0.003235 7.758796 2761.507400 1641.852909
4 24 M 66 0.269335 0.143961 0.167465 0.547745 2.327924 1.164109 1.420891 ... 0.002948 -0.005743 0.004726 -0.015247 0.003900 -0.007686 -0.003784 8.562517 2051.627447 817.111847
36 26 M 34 0.163026 0.094282 0.112654 0.378197 2.617874 1.361172 1.550565 ... -0.010711 0.006573 0.003950 -0.003954 0.001273 0.005890 0.000385 8.471564 2700.419449 1293.468915
11 42 M 67 1.608454 0.989100 0.894706 1.304613 6.031953 3.279233 3.729620 ... 0.019701 0.004293 0.026891 0.080337 -0.007352 0.055993 0.075071 10.674106 2007.441819 481.009629
10 39 M 67 1.232990 0.561293 0.784691 1.889785 4.139457 1.915796 2.692091 ... 0.031624 -0.018089 0.018493 0.006547 -0.004817 -0.003710 -0.000692 2.276702 1686.160000 669.461749
63 131 F 60 0.916706 0.566121 0.512857 1.467165 6.372832 3.251168 3.539229 ... -0.072828 -0.046235 0.041946 -0.065313 -0.016682 0.061026 -0.005883 6.972152 2792.655884 1518.529172
47 86 F 63 0.753234 0.467912 0.442792 0.540327 4.801768 2.677700 3.241265 ... -0.018937 0.011239 0.011488 0.015334 0.008337 0.005474 0.011687 10.515820 3057.463491 1494.054076
21 76 M 68 0.379367 0.223560 0.308426 1.821009 5.539475 2.551095 3.000675 ... 0.008846 -0.006281 -0.004169 -0.008164 0.012667 -0.015442 0.001157 5.218871 1649.621788 177.843734
60 125 M 63 0.513175 0.296489 0.334845 0.729804 9.686563 4.327943 5.687977 ... -0.046223 0.028016 -0.038739 0.011588 -0.011281 -0.004294 0.011239 11.094558 1964.218942 601.076046
17 62 M 57 0.351191 0.165077 0.227554 0.849025 3.695872 1.776465 2.379529 ... 0.024865 -0.008019 0.005046 0.023446 -0.000726 -0.016418 0.010174 6.031056 2074.091402 927.063276
34 16 M 38 0.415366 0.254119 0.243906 0.449362 2.394697 1.292778 1.468358 ... 0.000016 0.008047 -0.005823 -0.006588 -0.000030 0.003137 -0.002440 8.531677 3515.050257 2441.219054
24 84 F 55 0.419330 0.255329 0.338319 0.486463 4.366452 1.932596 2.855240 ... -0.005714 0.012337 -0.005475 -0.001179 -0.002687 0.005369 0.034289 11.766102 2888.617021 1839.961952
8 31 M 67 0.537010 0.243234 0.318075 1.951256 6.568645 2.891654 4.620345 ... 0.014234 0.013594 -0.003722 -0.003859 -0.011985 -0.027240 0.002325 8.760510 1840.022120 669.022078
2 21 F 58 0.264740 0.148228 0.177078 0.532566 1.850893 0.942743 1.071950 ... 0.007199 -0.013927 0.007908 0.007960 -0.009022 -0.012488 -0.015588 8.258488 2364.695972 796.723440
50 99 F 57 0.098881 0.065791 0.092655 0.363699 0.883453 0.449099 0.497111 ... 0.015152 -0.027019 0.037807 -0.019812 0.000436 -0.002840 -0.013161 10.108459 2143.018556 805.694015
46 85 F 55 0.454844 0.289458 0.293609 0.582757 2.124786 1.170689 1.182154 ... 0.006111 0.003214 0.007520 -0.024862 0.016600 -0.015827 0.002088 12.189059 3092.653846 1551.286187
49 97 F 39 0.463874 0.290374 0.278926 0.550290 2.449028 1.372602 1.412486 ... -0.000234 0.011520 0.009453 0.002072 -0.002811 0.004577 -0.009058 10.616077 3164.536485 2037.766311
5 25 M 51 0.339593 0.182070 0.204186 0.505987 1.969217 0.834783 1.208688 ... 0.009548 -0.005489 0.010164 -0.008341 -0.006132 0.005441 0.003568 9.810520 2143.018556 1004.727725
56 117 M 49 0.319535 0.143602 0.193376 0.846877 4.171980 1.868911 2.581316 ... -0.033465 0.019372 -0.041478 0.004583 -0.013913 -0.007205 0.013409 10.286022 1985.712014 804.666593
19 68 M 40 0.496922 0.213975 0.293201 0.957065 3.293700 1.672811 1.894214 ... 0.023989 -0.030137 0.024769 0.000567 0.005071 -0.023693 0.009770 7.872279 2143.018556 985.160918
42 63 F 50 0.323175 0.126138 0.176197 0.907850 3.069980 1.372204 1.869070 ... 0.004152 -0.001733 0.030566 -0.037259 -0.020167 -0.042955 -0.083977 9.167460 444.730268 1169.075556

51 rows × 134 columns

In [59]:
x_test
Out[59]:
ID Sex Age J1_a J3_a J5_a J55_a S1_a S3_a S5_a ... dCCi(6) dCCi(7) dCCi(8) dCCi(9) dCCi(10) dCCi(11) dCCi(12) d_1 F2_i F2_{conv}
48 89 F 45 0.155762 0.091831 0.117099 0.575170 1.575403 0.738640 0.834830 ... 0.002348 -0.002313 0.008766 0.006330 0.016645 -0.003417 0.002115 10.713432 3599.554394 2226.127951
38 42 M 60 0.182721 0.089174 0.113216 0.651748 1.839764 0.673977 1.084464 ... 0.021454 0.007635 0.019134 0.013320 0.002002 -0.022146 0.005825 15.420777 2313.388825 1381.628235
53 111 F 60 0.121065 0.079473 0.101627 0.285497 1.117740 0.558948 0.615284 ... -0.005255 0.017626 -0.007311 -0.006541 -0.012625 0.013987 0.014222 11.261019 3201.250289 1960.299963
7 28 M 58 1.210548 0.726523 0.661670 1.205596 8.492104 4.109625 5.377768 ... 0.007534 0.017088 0.015907 -0.006379 0.057303 -0.001919 -0.008007 8.422353 1964.218942 759.068477
22 78 F 64 0.437128 0.265519 0.349086 0.493971 3.205305 1.515726 2.119909 ... -0.060397 0.056134 -0.069202 -0.040180 -0.058987 0.044552 0.077897 2.986929 1800.351911 784.205580
12 46 F 50 0.199868 0.132266 0.157546 0.454599 1.467287 0.706004 0.819799 ... 0.001819 0.011291 -0.001916 0.001513 -0.002502 0.002014 0.010446 10.950821 3164.536485 1553.425003
25 92 F 39 2.061820 1.219720 1.280772 1.306779 15.095252 7.876805 9.839396 ... 0.009492 -0.033642 -0.032431 -0.056497 -0.003210 0.019443 0.004703 7.986808 3164.536485 1734.620853
44 77 F 40 0.349185 0.213207 0.227957 0.347673 3.395795 1.826527 2.123809 ... -0.042489 -0.021018 0.049357 0.005870 -0.027345 -0.026138 0.009430 10.594026 3515.050257 1925.062482
32 4 F 53 0.598550 0.350577 0.378646 0.654399 4.224992 2.312947 2.678596 ... 0.001768 -0.003924 0.001995 0.004019 -0.007207 0.004614 0.003154 7.615608 2921.471038 1230.982918
20 72 F 64 0.797801 0.470116 0.522910 1.663179 7.778152 3.640445 4.355894 ... 0.028207 -0.052002 -0.038210 0.028143 0.033156 0.010924 0.020823 5.924309 1880.538263 482.819916
37 28 M 35 0.758571 0.464961 0.413086 0.605323 9.271523 5.707370 5.150374 ... -0.003686 0.018992 0.034957 0.016890 -0.009376 -0.008998 -0.007465 12.892692 2888.617021 1653.804580
16 58 M 58 5.391649 3.217293 3.321567 5.991336 29.441589 16.791944 18.368778 ... -0.002691 0.008994 0.025390 0.040231 0.003503 -0.006546 -0.024835 8.008742 1921.927690 583.380671
62 129 F 68 1.336216 0.815757 0.733197 0.981928 11.224542 5.295879 6.994751 ... 0.025679 0.015712 0.013437 0.025113 0.008852 -0.010132 -0.008458 10.670669 3201.250289 2284.051658

13 rows × 134 columns

In [60]:
y_train
Out[60]:
30    1
39    0
54    0
52    0
45    0
61    0
28    1
58    0
23    1
33    0
41    0
15    1
31    0
27    1
26    1
59    0
35    0
40    0
29    1
0     1
18    1
55    0
13    1
57    0
6     1
14    1
51    0
9     1
3     1
1     1
43    0
4     1
36    0
11    1
10    1
63    0
47    0
21    1
60    0
17    1
34    0
24    1
8     1
2     1
50    0
46    0
49    0
5     1
56    0
19    1
42    0
Name: Diagnosis (ALS), dtype: int64
In [61]:
y_test
Out[61]:
48    0
38    0
53    0
7     1
22    1
12    1
25    1
44    0
32    0
20    1
37    0
16    1
62    0
Name: Diagnosis (ALS), dtype: int64
In [62]:
y_test.shape
Out[62]:
(13,)

ONE HOT ENCODER

In [63]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
In [64]:
categorical_cols=x_train.select_dtypes(include=["object"]).columns
print("CATEGORICAL COLUMNS :",categorical_cols)
numeric_cols=x_train.select_dtypes(include=["number"]).columns
numeric_cols=r[["Diagnosis (ALS)",'Age']]
print("NUMERIC COLUMNS:",numeric_cols)
CATEGORICAL COLUMNS : Index(['Sex'], dtype='object')
NUMERIC COLUMNS:     Diagnosis (ALS)  Age
0                 1   58
1                 1   57
2                 1   58
3                 1   70
4                 1   66
..              ...  ...
59                0   43
60                0   63
61                0   67
62                0   68
63                0   60

[64 rows x 2 columns]
In [65]:
 r.describe()
Out[65]:
ID Age J1_a J3_a J5_a J55_a S1_a S3_a S5_a S11_a ... dCCi(7) dCCi(8) dCCi(9) dCCi(10) dCCi(11) dCCi(12) d_1 F2_i F2_{conv} Diagnosis (ALS)
count 64.000000 64.000000 64.000000 64.000000 64.000000 64.000000 64.000000 64.000000 64.000000 64.000000 ... 64.000000 64.000000 64.000000 64.000000 64.000000 64.000000 64.000000 64.000000 64.000000 64.000000
mean 67.875000 56.390625 0.658951 0.379242 0.395886 0.945496 5.072071 2.617924 3.078959 4.393513 ... 0.003393 0.006224 -0.002889 -0.002152 -0.001969 0.001269 9.164473 2495.116475 1209.976405 0.484375
std 37.358198 10.203668 0.724002 0.435636 0.431926 0.791558 4.087221 2.289347 2.568471 3.327968 ... 0.033268 0.024670 0.028315 0.019542 0.018012 0.020800 2.681449 617.755856 553.694046 0.503706
min 2.000000 34.000000 0.098881 0.065791 0.092655 0.285497 0.883453 0.449099 0.497111 0.804179 ... -0.052002 -0.069202 -0.120838 -0.065735 -0.042955 -0.083977 2.276702 444.730268 48.246203 0.000000
25% 31.750000 50.750000 0.325932 0.172422 0.198274 0.538387 2.641830 1.369446 1.508064 2.123761 ... -0.008234 -0.004222 -0.009654 -0.009853 -0.012674 -0.006534 7.604734 2051.627447 800.181156 0.000000
50% 66.500000 58.000000 0.458935 0.253976 0.293405 0.698183 4.198486 1.966045 2.653666 3.634994 ... 0.000775 0.006272 -0.000937 -0.001614 -0.003921 0.000661 9.646564 2471.097222 1206.596083 0.000000
75% 98.250000 63.250000 0.772783 0.465699 0.476541 1.189025 6.259731 3.258184 3.801504 5.644808 ... 0.011561 0.019092 0.006745 0.008349 0.005578 0.009515 10.757522 2938.236560 1551.677678 1.000000
max 131.000000 80.000000 5.391649 3.217293 3.321567 5.991336 29.441589 16.791944 18.368778 20.750202 ... 0.220533 0.089766 0.080337 0.057303 0.061026 0.077897 15.420777 3599.554394 2441.219054 1.000000

8 rows × 134 columns

In [66]:
r.mode()
Out[66]:
ID Sex Age J1_a J3_a J5_a J55_a S1_a S3_a S5_a ... dCCi(7) dCCi(8) dCCi(9) dCCi(10) dCCi(11) dCCi(12) d_1 F2_i F2_{conv} Diagnosis (ALS)
0 24.0 F 60.0 0.098881 0.065791 0.092655 0.285497 0.883453 0.449099 0.497111 ... -0.052002 -0.069202 -0.120838 -0.065735 -0.042955 -0.083977 2.276702 3515.050257 48.246203 0.0
1 28.0 NaN NaN 0.121065 0.079473 0.101627 0.304952 1.117740 0.558948 0.615284 ... -0.046235 -0.041478 -0.065313 -0.058987 -0.034648 -0.031324 2.512995 NaN 177.843734 NaN
2 42.0 NaN NaN 0.155762 0.089174 0.112654 0.347673 1.467287 0.673977 0.819799 ... -0.033642 -0.038984 -0.064556 -0.048113 -0.027240 -0.024835 2.986929 NaN 359.409974 NaN
3 NaN NaN NaN 0.163026 0.091831 0.113216 0.363699 1.575403 0.706004 0.834830 ... -0.030137 -0.038739 -0.056497 -0.037710 -0.026549 -0.021879 4.825476 NaN 481.009629 NaN
4 NaN NaN NaN 0.176448 0.094282 0.117099 0.378197 1.839764 0.738640 1.071950 ... -0.028306 -0.038210 -0.040180 -0.027345 -0.026138 -0.021149 5.218871 NaN 482.819916 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
59 NaN NaN NaN 1.394205 0.839935 0.784691 1.821009 9.271523 4.754839 5.377768 ... 0.025207 0.037807 0.028143 0.016645 0.015248 0.020823 12.862700 NaN 2157.871393 NaN
60 NaN NaN NaN 1.529994 0.894926 0.809959 1.889785 9.686563 5.295879 5.687977 ... 0.028016 0.041946 0.040231 0.022928 0.019443 0.022756 12.874560 NaN 2210.936432 NaN
61 NaN NaN NaN 1.608454 0.989100 0.894706 1.951256 11.224542 5.707370 6.994751 ... 0.036827 0.042347 0.048771 0.033156 0.044552 0.034289 12.892692 NaN 2226.127951 NaN
62 NaN NaN NaN 2.061820 1.219720 1.280772 1.965493 15.095252 7.876805 9.839396 ... 0.056134 0.049357 0.051874 0.035976 0.055993 0.075071 14.651111 NaN 2284.051658 NaN
63 NaN NaN NaN 5.391649 3.217293 3.321567 5.991336 29.441589 16.791944 18.368778 ... 0.220533 0.089766 0.080337 0.057303 0.061026 0.077897 15.420777 NaN 2441.219054 NaN

64 rows × 135 columns

In [67]:
r.corr
Out[67]:
<bound method DataFrame.corr of      ID Sex  Age      J1_a      J3_a      J5_a     J55_a       S1_a      S3_a  \
0     8   M   58  0.321817  0.141230  0.199128  0.923634   6.044559  3.196477   
1    20   F   57  0.344026  0.177032  0.206458  0.827714   1.967728  0.856639   
2    21   F   58  0.264740  0.148228  0.177078  0.532566   1.850893  0.942743   
3    22   F   70  0.455793  0.174870  0.243660  0.962641   2.883768  1.284926   
4    24   M   66  0.269335  0.143961  0.167465  0.547745   2.327924  1.164109   
..  ...  ..  ...       ...       ...       ...       ...        ...       ...   
59  123   M   43  0.255799  0.123679  0.182658  0.505591   6.222031  2.876602   
60  125   M   63  0.513175  0.296489  0.334845  0.729804   9.686563  4.327943   
61  127   F   67  0.383901  0.245923  0.251359  0.415136   4.148414  2.069757   
62  129   F   68  1.336216  0.815757  0.733197  0.981928  11.224542  5.295879   
63  131   F   60  0.916706  0.566121  0.512857  1.467165   6.372832  3.251168   

        S5_a  ...   dCCi(7)   dCCi(8)   dCCi(9)  dCCi(10)  dCCi(11)  dCCi(12)  \
0   3.770575  ... -0.024467 -0.005300  0.051874 -0.037710 -0.026549 -0.021149   
1   1.179851  ...  0.002485 -0.004535 -0.000225 -0.006977 -0.012510  0.014773   
2   1.071950  ... -0.013927  0.007908  0.007960 -0.009022 -0.012488 -0.015588   
3   1.915058  ... -0.019285 -0.021768  0.020495  0.035976 -0.034648  0.008021   
4   1.420891  ... -0.005743  0.004726 -0.015247  0.003900 -0.007686 -0.003784   
..       ...  ...       ...       ...       ...       ...       ...       ...   
59  3.894294  ...  0.220533  0.089766 -0.120838 -0.004221 -0.013165  0.004642   
60  5.687977  ...  0.028016 -0.038739  0.011588 -0.011281 -0.004294  0.011239   
61  2.527213  ...  0.011685  0.007883 -0.014839  0.013859  0.011145  0.001418   
62  6.994751  ...  0.015712  0.013437  0.025113  0.008852 -0.010132 -0.008458   
63  3.539229  ... -0.046235  0.041946 -0.065313 -0.016682  0.061026 -0.005883   

          d_1         F2_i    F2_{conv}  Diagnosis (ALS)  
0    4.825476  2526.285657   833.498083                1  
1    5.729322  1985.712014   561.802625                1  
2    8.258488  2364.695972   796.723440                1  
3    5.447137  1860.172768   359.409974                1  
4    8.562517  2051.627447   817.111847                1  
..        ...          ...          ...              ...  
59   9.855665  3128.341308  1990.937097                0  
60  11.094558  1964.218942   601.076046                0  
61  12.564742  2526.285657   934.343638                0  
62  10.670669  3201.250289  2284.051658                0  
63   6.972152  2792.655884  1518.529172                0  

[64 rows x 135 columns]>
In [68]:
preprocessor=ColumnTransformer(
    transformers=[
        ('dog',
         OneHotEncoder(handle_unknown="ignore"),
         categorical_cols)
        
    ],
    remainder="passthrough"
)
In [69]:
x_train_transformed=preprocessor.fit_transform(x_train)
In [70]:
x_train_transformed
Out[70]:
array([[1.00000000e+00, 0.00000000e+00, 1.02000000e+02, ...,
        1.20360009e+01, 2.52628566e+03, 1.20026987e+03],
       [0.00000000e+00, 1.00000000e+00, 4.90000000e+01, ...,
        1.00746439e+01, 2.11978261e+03, 5.80338238e+02],
       [1.00000000e+00, 0.00000000e+00, 1.13000000e+02, ...,
        7.38863107e+00, 2.09680836e+03, 7.37085571e+02],
       ...,
       [0.00000000e+00, 1.00000000e+00, 1.17000000e+02, ...,
        1.02860219e+01, 1.98571201e+03, 8.04666593e+02],
       [0.00000000e+00, 1.00000000e+00, 6.80000000e+01, ...,
        7.87227878e+00, 2.14301856e+03, 9.85160918e+02],
       [1.00000000e+00, 0.00000000e+00, 6.30000000e+01, ...,
        9.16746005e+00, 4.44730268e+02, 1.16907556e+03]])
In [71]:
one_hot_encoded_columns=preprocessor.named_transformers_['dog'].get_feature_names_out(categorical_cols)
In [72]:
all_columns=list(one_hot_encoded_columns) + [col for col in x_train.columns if col not in categorical_cols]
In [74]:
x_train_transformed_df=pd.DataFrame(x_train_transformed, columns=all_columns)
one_hot_encoded_columns
Out[74]:
array(['Sex_F', 'Sex_M'], dtype=object)
In [75]:
x_train_transformed_df
Out[75]:
Sex_F Sex_M ID Age J1_a J3_a J5_a J55_a S1_a S3_a ... dCCi(6) dCCi(7) dCCi(8) dCCi(9) dCCi(10) dCCi(11) dCCi(12) d_1 F2_i F2_{conv}
0 1.0 0.0 102.0 53.0 0.561542 0.331788 0.345130 1.020709 6.074875 2.798090 ... 0.022664 0.008537 -0.000306 -0.012570 -0.048113 0.009073 -0.005201 12.036001 2526.285657 1200.269866
1 0.0 1.0 49.0 38.0 0.176448 0.103346 0.123381 0.649644 2.649815 1.410593 ... 0.016461 0.014798 -0.011568 -0.009276 -0.003996 -0.003248 -0.005049 10.074644 2119.782609 580.338238
2 1.0 0.0 113.0 62.0 0.823780 0.503064 0.497916 0.704065 6.861939 3.993216 ... 0.014494 0.003410 0.000716 -0.010791 0.004878 -0.005527 -0.001659 7.388631 2096.808356 737.085571
3 1.0 0.0 109.0 59.0 0.326851 0.215126 0.182667 0.304952 2.801295 1.669496 ... -0.000688 -0.018500 0.030142 0.002111 0.006603 -0.013728 -0.021879 8.064921 2700.419449 1228.586973
4 1.0 0.0 81.0 60.0 0.286517 0.176603 0.195712 0.692300 1.847736 0.869278 ... 0.023767 -0.020693 0.019078 -0.034556 -0.006731 -0.025442 -0.005668 12.874560 2670.464441 1309.871125
5 1.0 0.0 127.0 67.0 0.383901 0.245923 0.251359 0.415136 4.148414 2.069757 ... -0.005008 0.011685 0.007883 -0.014839 0.013859 0.011145 0.001418 12.564742 2526.285657 934.343638
6 0.0 1.0 98.0 68.0 1.076446 0.624824 0.561100 1.965493 7.001105 3.785627 ... -0.011602 0.018274 0.004485 0.005101 -0.007367 0.007041 -0.001560 11.097431 3515.050257 2157.871393
7 1.0 0.0 121.0 67.0 0.237654 0.154117 0.154312 0.544602 3.583597 1.690501 ... -0.027062 -0.002917 0.020006 -0.064556 0.014446 0.015248 -0.013882 9.311776 2670.464441 1087.940178
8 1.0 0.0 80.0 63.0 0.504802 0.253832 0.313823 1.229761 6.571067 2.833840 ... 0.013513 -0.003382 0.027770 -0.005112 -0.027102 0.012612 -0.010165 7.339732 3556.978755 2129.076098
9 0.0 1.0 6.0 41.0 1.063272 0.683918 0.504427 0.613402 3.176717 1.870164 ... 0.017293 -0.008880 -0.001511 0.003965 0.015240 -0.006500 -0.008723 10.482453 2263.284796 801.333727
10 1.0 0.0 61.0 37.0 0.818954 0.484998 0.511515 0.721296 4.942091 2.724820 ... 0.005945 0.021267 -0.008438 0.007339 -0.004838 -0.004133 0.000937 10.151495 2444.009071 1157.993834
11 0.0 1.0 55.0 61.0 1.177795 0.730069 0.569287 1.542224 5.883227 3.384535 ... 0.028733 -0.004708 -0.004383 0.006398 -0.014412 -0.005085 0.005530 7.148809 2051.627447 784.563460
12 1.0 0.0 2.0 64.0 0.219429 0.144385 0.171661 0.555528 2.054277 1.108746 ... -0.011558 -0.004168 0.018603 0.019350 0.016342 0.014127 0.022756 10.121803 2988.533127 1332.559788
13 1.0 0.0 96.0 52.0 0.475047 0.308491 0.266091 0.396462 3.686641 1.999494 ... 0.009989 0.000009 0.042347 -0.003170 0.002103 0.011023 -0.031324 11.597077 2792.655884 1457.933269
14 1.0 0.0 94.0 55.0 0.764443 0.423008 0.472764 1.699466 8.254286 3.966819 ... 0.018794 0.036827 0.029426 -0.024056 -0.065735 0.012986 0.012941 9.482607 2471.097222 1019.521207
15 0.0 1.0 123.0 43.0 0.255799 0.123679 0.182658 0.505591 6.222031 2.876602 ... -0.060395 0.220533 0.089766 -0.120838 -0.004221 -0.013165 0.004642 9.855665 3128.341308 1990.937097
16 0.0 1.0 24.0 60.0 0.391222 0.200687 0.232286 0.638498 2.324343 1.181015 ... -0.005405 -0.006063 0.028443 -0.006618 0.003011 -0.005547 -0.002806 10.525555 2700.419449 1393.549002
17 0.0 1.0 53.0 60.0 0.487857 0.208116 0.282759 1.183502 4.734293 2.542676 ... -0.003976 0.001242 0.005562 -0.000695 0.003333 0.004020 -0.005159 9.943562 2471.097222 1212.922300
18 0.0 1.0 100.0 69.0 0.511742 0.313666 0.320677 0.431433 7.417198 4.258109 ... -0.013859 0.000308 -0.038984 0.048771 -0.014493 0.008647 -0.004524 2.512995 1233.583584 48.246203
19 0.0 1.0 8.0 58.0 0.321817 0.141230 0.199128 0.923634 6.044559 3.196477 ... 0.016809 -0.024467 -0.005300 0.051874 -0.037710 -0.026549 -0.021149 4.825476 2526.285657 833.498083
20 0.0 1.0 64.0 57.0 0.426554 0.202661 0.255198 0.885479 4.387137 1.892587 ... 0.003446 0.010420 0.005288 -0.006713 0.013147 -0.017816 0.017087 11.136041 2238.670803 930.223353
21 1.0 0.0 115.0 50.0 0.462076 0.269359 0.296952 0.909990 5.020638 2.305690 ... 0.001699 -0.001674 0.012043 -0.003015 0.022928 -0.005444 -0.006394 14.651111 3515.050257 2210.936432
22 1.0 0.0 48.0 63.0 0.805433 0.335434 0.487871 1.813700 4.485662 2.192828 ... 0.005770 -0.028306 -0.035823 -0.004287 -0.014985 -0.004156 -0.005061 7.029500 2730.764545 1288.920905
23 1.0 0.0 119.0 65.0 0.356684 0.233194 0.242305 0.410246 3.251335 1.867689 ... 0.005494 -0.003761 0.006982 0.012774 0.008384 0.002823 -0.014323 10.889792 2471.097222 1549.074209
24 0.0 1.0 27.0 57.0 0.691093 0.406901 0.406287 0.765986 6.168256 3.702088 ... 0.002661 -0.012605 0.013385 0.004513 0.001568 -0.008244 0.005801 5.945219 2313.388825 1219.744513
25 1.0 0.0 52.0 62.0 1.394205 0.839935 0.703224 1.362995 4.661601 2.402909 ... -0.000823 0.025207 -0.002524 -0.006103 -0.012850 -0.015023 0.019711 12.862700 3092.653846 1552.852150
26 0.0 1.0 107.0 80.0 0.532330 0.296012 0.322217 0.650045 8.042171 4.754839 ... -0.014078 0.010696 -0.003448 -0.015049 0.010195 -0.002152 0.001881 9.472404 2238.670803 1241.852695
27 0.0 1.0 32.0 61.0 0.387730 0.213745 0.249993 0.591160 3.351240 1.874979 ... 0.004241 0.006956 -0.002013 -0.000082 0.008275 -0.013829 -0.006955 7.572111 1780.825796 838.978523
28 1.0 0.0 22.0 70.0 0.455793 0.174870 0.243660 0.962641 2.883768 1.284926 ... 0.013213 -0.019285 -0.021768 0.020495 0.035976 -0.034648 0.008021 5.447137 1860.172768 359.409974
29 1.0 0.0 20.0 57.0 0.344026 0.177032 0.206458 0.827714 1.967728 0.856639 ... 0.019235 0.002485 -0.004535 -0.000225 -0.006977 -0.012510 0.014773 5.729322 1985.712014 561.802625
30 0.0 1.0 65.0 52.0 1.529994 0.894926 0.809959 1.715071 7.627483 4.321696 ... 0.015596 -0.012218 0.015828 -0.014697 0.010636 0.004654 -0.003235 7.758796 2761.507400 1641.852909
31 0.0 1.0 24.0 66.0 0.269335 0.143961 0.167465 0.547745 2.327924 1.164109 ... 0.002948 -0.005743 0.004726 -0.015247 0.003900 -0.007686 -0.003784 8.562517 2051.627447 817.111847
32 0.0 1.0 26.0 34.0 0.163026 0.094282 0.112654 0.378197 2.617874 1.361172 ... -0.010711 0.006573 0.003950 -0.003954 0.001273 0.005890 0.000385 8.471564 2700.419449 1293.468915
33 0.0 1.0 42.0 67.0 1.608454 0.989100 0.894706 1.304613 6.031953 3.279233 ... 0.019701 0.004293 0.026891 0.080337 -0.007352 0.055993 0.075071 10.674106 2007.441819 481.009629
34 0.0 1.0 39.0 67.0 1.232990 0.561293 0.784691 1.889785 4.139457 1.915796 ... 0.031624 -0.018089 0.018493 0.006547 -0.004817 -0.003710 -0.000692 2.276702 1686.160000 669.461749
35 1.0 0.0 131.0 60.0 0.916706 0.566121 0.512857 1.467165 6.372832 3.251168 ... -0.072828 -0.046235 0.041946 -0.065313 -0.016682 0.061026 -0.005883 6.972152 2792.655884 1518.529172
36 1.0 0.0 86.0 63.0 0.753234 0.467912 0.442792 0.540327 4.801768 2.677700 ... -0.018937 0.011239 0.011488 0.015334 0.008337 0.005474 0.011687 10.515820 3057.463491 1494.054076
37 0.0 1.0 76.0 68.0 0.379367 0.223560 0.308426 1.821009 5.539475 2.551095 ... 0.008846 -0.006281 -0.004169 -0.008164 0.012667 -0.015442 0.001157 5.218871 1649.621788 177.843734
38 0.0 1.0 125.0 63.0 0.513175 0.296489 0.334845 0.729804 9.686563 4.327943 ... -0.046223 0.028016 -0.038739 0.011588 -0.011281 -0.004294 0.011239 11.094558 1964.218942 601.076046
39 0.0 1.0 62.0 57.0 0.351191 0.165077 0.227554 0.849025 3.695872 1.776465 ... 0.024865 -0.008019 0.005046 0.023446 -0.000726 -0.016418 0.010174 6.031056 2074.091402 927.063276
40 0.0 1.0 16.0 38.0 0.415366 0.254119 0.243906 0.449362 2.394697 1.292778 ... 0.000016 0.008047 -0.005823 -0.006588 -0.000030 0.003137 -0.002440 8.531677 3515.050257 2441.219054
41 1.0 0.0 84.0 55.0 0.419330 0.255329 0.338319 0.486463 4.366452 1.932596 ... -0.005714 0.012337 -0.005475 -0.001179 -0.002687 0.005369 0.034289 11.766102 2888.617021 1839.961952
42 0.0 1.0 31.0 67.0 0.537010 0.243234 0.318075 1.951256 6.568645 2.891654 ... 0.014234 0.013594 -0.003722 -0.003859 -0.011985 -0.027240 0.002325 8.760510 1840.022120 669.022078
43 1.0 0.0 21.0 58.0 0.264740 0.148228 0.177078 0.532566 1.850893 0.942743 ... 0.007199 -0.013927 0.007908 0.007960 -0.009022 -0.012488 -0.015588 8.258488 2364.695972 796.723440
44 1.0 0.0 99.0 57.0 0.098881 0.065791 0.092655 0.363699 0.883453 0.449099 ... 0.015152 -0.027019 0.037807 -0.019812 0.000436 -0.002840 -0.013161 10.108459 2143.018556 805.694015
45 1.0 0.0 85.0 55.0 0.454844 0.289458 0.293609 0.582757 2.124786 1.170689 ... 0.006111 0.003214 0.007520 -0.024862 0.016600 -0.015827 0.002088 12.189059 3092.653846 1551.286187
46 1.0 0.0 97.0 39.0 0.463874 0.290374 0.278926 0.550290 2.449028 1.372602 ... -0.000234 0.011520 0.009453 0.002072 -0.002811 0.004577 -0.009058 10.616077 3164.536485 2037.766311
47 0.0 1.0 25.0 51.0 0.339593 0.182070 0.204186 0.505987 1.969217 0.834783 ... 0.009548 -0.005489 0.010164 -0.008341 -0.006132 0.005441 0.003568 9.810520 2143.018556 1004.727725
48 0.0 1.0 117.0 49.0 0.319535 0.143602 0.193376 0.846877 4.171980 1.868911 ... -0.033465 0.019372 -0.041478 0.004583 -0.013913 -0.007205 0.013409 10.286022 1985.712014 804.666593
49 0.0 1.0 68.0 40.0 0.496922 0.213975 0.293201 0.957065 3.293700 1.672811 ... 0.023989 -0.030137 0.024769 0.000567 0.005071 -0.023693 0.009770 7.872279 2143.018556 985.160918
50 1.0 0.0 63.0 50.0 0.323175 0.126138 0.176197 0.907850 3.069980 1.372204 ... 0.004152 -0.001733 0.030566 -0.037259 -0.020167 -0.042955 -0.083977 9.167460 444.730268 1169.075556

51 rows × 135 columns

DECISIONTREECLASSIFIER

In [76]:
models=DecisionTreeClassifier(criterion="entropy",max_depth=10,min_samples_split=3)
In [79]:
models.fit(x_train_transformed_df,y_train)
Out[79]:
DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=3)
In [80]:
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
In [88]:
pred=models.predict(x_train_transformed_df)
pred
Out[88]:
array([1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0,
       1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 0, 1, 0], dtype=int64)
In [90]:
accuracy_score(y_train,pred)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[90], line 1
----> 1 accuracy_score(y_train,pred)

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:192, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    187 validate_parameter_constraints(
    188     parameter_constraints, params, caller_name=func.__qualname__
    189 )
    191 try:
--> 192     return func(*args, **kwargs)
    193 except InvalidParameterError as e:
    194     # When the function is just a wrapper around an estimator, we allow
    195     # the function to delegate validation to the estimator, but we replace
    196     # the name of the estimator by the name of the function in the error
    197     # message to avoid confusion.
    198     msg = re.sub(
    199         r"parameter of \w+ must be",
    200         f"parameter of {func.__qualname__} must be",
    201         str(e),
    202     )

File ~\anaconda3\Lib\site-packages\sklearn\metrics\_classification.py:221, in accuracy_score(y_true, y_pred, normalize, sample_weight)
    155 """Accuracy classification score.
    156 
    157 In multilabel classification, this function computes subset accuracy:
   (...)
    217 0.5
    218 """
    220 # Compute accuracy for each possible representation
--> 221 y_type, y_true, y_pred = _check_targets(y_true, y_pred)
    222 check_consistent_length(y_true, y_pred, sample_weight)
    223 if y_type.startswith("multilabel"):

File ~\anaconda3\Lib\site-packages\sklearn\metrics\_classification.py:86, in _check_targets(y_true, y_pred)
     59 def _check_targets(y_true, y_pred):
     60     """Check that y_true and y_pred belong to the same classification task.
     61 
     62     This converts multiclass or binary types to a common shape, and raises a
   (...)
     84     y_pred : array or indicator matrix
     85     """
---> 86     check_consistent_length(y_true, y_pred)
     87     type_true = type_of_target(y_true, input_name="y_true")
     88     type_pred = type_of_target(y_pred, input_name="y_pred")

File ~\anaconda3\Lib\site-packages\sklearn\utils\validation.py:397, in check_consistent_length(*arrays)
    395 uniques = np.unique(lengths)
    396 if len(uniques) > 1:
--> 397     raise ValueError(
    398         "Found input variables with inconsistent numbers of samples: %r"
    399         % [int(l) for l in lengths]
    400     )

ValueError: Found input variables with inconsistent numbers of samples: [54, 51]
In [91]:
print(classification_report(y_train,pred))
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[91], line 1
----> 1 print(classification_report(y_train,pred))

File ~\anaconda3\Lib\site-packages\sklearn\metrics\_classification.py:2310, in classification_report(y_true, y_pred, labels, target_names, sample_weight, digits, output_dict, zero_division)
   2195 def classification_report(
   2196     y_true,
   2197     y_pred,
   (...)
   2204     zero_division="warn",
   2205 ):
   2206     """Build a text report showing the main classification metrics.
   2207 
   2208     Read more in the :ref:`User Guide <classification_report>`.
   (...)
   2307     <BLANKLINE>
   2308     """
-> 2310     y_type, y_true, y_pred = _check_targets(y_true, y_pred)
   2312     if labels is None:
   2313         labels = unique_labels(y_true, y_pred)

File ~\anaconda3\Lib\site-packages\sklearn\metrics\_classification.py:86, in _check_targets(y_true, y_pred)
     59 def _check_targets(y_true, y_pred):
     60     """Check that y_true and y_pred belong to the same classification task.
     61 
     62     This converts multiclass or binary types to a common shape, and raises a
   (...)
     84     y_pred : array or indicator matrix
     85     """
---> 86     check_consistent_length(y_true, y_pred)
     87     type_true = type_of_target(y_true, input_name="y_true")
     88     type_pred = type_of_target(y_pred, input_name="y_pred")

File ~\anaconda3\Lib\site-packages\sklearn\utils\validation.py:397, in check_consistent_length(*arrays)
    395 uniques = np.unique(lengths)
    396 if len(uniques) > 1:
--> 397     raise ValueError(
    398         "Found input variables with inconsistent numbers of samples: %r"
    399         % [int(l) for l in lengths]
    400     )

ValueError: Found input variables with inconsistent numbers of samples: [54, 51]

LOGISTICREGRESSION

In [92]:
models=LogisticRegression()
models.fit(x_train_transformed_df,y_train)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[92], line 2
      1 models=LogisticRegression()
----> 2 models.fit(x_train_transformed_df,y_train)

File ~\anaconda3\Lib\site-packages\sklearn\linear_model\_logistic.py:1196, in LogisticRegression.fit(self, X, y, sample_weight)
   1193 else:
   1194     _dtype = [np.float64, np.float32]
-> 1196 X, y = self._validate_data(
   1197     X,
   1198     y,
   1199     accept_sparse="csr",
   1200     dtype=_dtype,
   1201     order="C",
   1202     accept_large_sparse=solver not in ["liblinear", "sag", "saga"],
   1203 )
   1204 check_classification_targets(y)
   1205 self.classes_ = np.unique(y)

File ~\anaconda3\Lib\site-packages\sklearn\base.py:584, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, **check_params)
    582         y = check_array(y, input_name="y", **check_y_params)
    583     else:
--> 584         X, y = check_X_y(X, y, **check_params)
    585     out = X, y
    587 if not no_val_X and check_params.get("ensure_2d", True):

File ~\anaconda3\Lib\site-packages\sklearn\utils\validation.py:1124, in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
   1106 X = check_array(
   1107     X,
   1108     accept_sparse=accept_sparse,
   (...)
   1119     input_name="X",
   1120 )
   1122 y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator)
-> 1124 check_consistent_length(X, y)
   1126 return X, y

File ~\anaconda3\Lib\site-packages\sklearn\utils\validation.py:397, in check_consistent_length(*arrays)
    395 uniques = np.unique(lengths)
    396 if len(uniques) > 1:
--> 397     raise ValueError(
    398         "Found input variables with inconsistent numbers of samples: %r"
    399         % [int(l) for l in lengths]
    400     )

ValueError: Found input variables with inconsistent numbers of samples: [51, 54]

GRIDSEARCH

In [93]:
x=pd.get_dummies(x)
In [94]:
x
Out[94]:
ID Age J1_a J3_a J5_a J55_a S1_a S3_a S5_a S11_a ... dCCi(8) dCCi(9) dCCi(10) dCCi(11) dCCi(12) d_1 F2_i F2_{conv} Sex_F Sex_M
0 8 58 0.321817 0.141230 0.199128 0.923634 6.044559 3.196477 3.770575 5.709480 ... -0.005300 0.051874 -0.037710 -0.026549 -0.021149 4.825476 2526.285657 833.498083 False True
1 20 57 0.344026 0.177032 0.206458 0.827714 1.967728 0.856639 1.179851 2.050048 ... -0.004535 -0.000225 -0.006977 -0.012510 0.014773 5.729322 1985.712014 561.802625 True False
2 21 58 0.264740 0.148228 0.177078 0.532566 1.850893 0.942743 1.071950 1.502212 ... 0.007908 0.007960 -0.009022 -0.012488 -0.015588 8.258488 2364.695972 796.723440 True False
3 22 70 0.455793 0.174870 0.243660 0.962641 2.883768 1.284926 1.915058 2.929953 ... -0.021768 0.020495 0.035976 -0.034648 0.008021 5.447137 1860.172768 359.409974 True False
4 24 66 0.269335 0.143961 0.167465 0.547745 2.327924 1.164109 1.420891 2.141512 ... 0.004726 -0.015247 0.003900 -0.007686 -0.003784 8.562517 2051.627447 817.111847 False True
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
59 123 43 0.255799 0.123679 0.182658 0.505591 6.222031 2.876602 3.894294 5.697601 ... 0.089766 -0.120838 -0.004221 -0.013165 0.004642 9.855665 3128.341308 1990.937097 False True
60 125 63 0.513175 0.296489 0.334845 0.729804 9.686563 4.327943 5.687977 9.912757 ... -0.038739 0.011588 -0.011281 -0.004294 0.011239 11.094558 1964.218942 601.076046 False True
61 127 67 0.383901 0.245923 0.251359 0.415136 4.148414 2.069757 2.527213 3.362755 ... 0.007883 -0.014839 0.013859 0.011145 0.001418 12.564742 2526.285657 934.343638 True False
62 129 68 1.336216 0.815757 0.733197 0.981928 11.224542 5.295879 6.994751 11.706090 ... 0.013437 0.025113 0.008852 -0.010132 -0.008458 10.670669 3201.250289 2284.051658 True False
63 131 60 0.916706 0.566121 0.512857 1.467165 6.372832 3.251168 3.539229 5.627211 ... 0.041946 -0.065313 -0.016682 0.061026 -0.005883 6.972152 2792.655884 1518.529172 True False

64 rows × 135 columns

In [95]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.15,random_state=42)
In [96]:
model=DecisionTreeClassifier()
In [97]:
grid={
    'criterion':["gini"],
    'min_samples_split':[2,4,6,8],
    'max_depth':[10,6,8]
}
In [98]:
gscv=GridSearchCV(estimator=model,param_grid=grid,cv=5,n_jobs-1)
  Cell In[98], line 1
    gscv=GridSearchCV(estimator=model,param_grid=grid,cv=5,n_jobs-1)
                                                                   ^
SyntaxError: positional argument follows keyword argument
In [ ]:
gscv.fit(x_train,y_train)
In [ ]: